* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"

* Create required subfolders
cap mkdir "${root}/results/Employment"

********************************************************************************
**# 1. Clean data
********************************************************************************

project, uses("${root}/data/dvc/Employment/PPP_Paychex_Earnin_Combined.dta")
use "${root}/data/dvc/Employment/PPP_Paychex_Earnin_Combined.dta", clear
gisid countyfips naics quartile size date

*------------------------------------------------------------------------------*
* Define eligibility groups 
*------------------------------------------------------------------------------*
gen eligible = 1 if inrange(size, 100, 499)
replace eligible = 0 if inrange(size, 500, 799)
assert !mi(eligible)

*------------------------------------------------------------------------------*
* Reweighting by industry 
*------------------------------------------------------------------------------*
* We use eligiblity-level industry shares since we are not fully balanced on firm size bins X NAICS 
* so if we reweight at the firm size bin level, we cannot simultaneously have the same industry composition 
* across bins and industry shares that sum to 1 within bins 

* Get share of employment by NAICS in Jan 2020, overall; for 100-499 cells; for 500-799 cells 
gegen tot_emp_naics_eligible = total(base), by(naics eligible)
gegen tot_emp_eligible = total(base), by(eligible)
gegen tot_emp_naics = total(base), by(naics)
gegen tot_emp = total(base)

gen naics_emp_share = (tot_emp_naics / tot_emp) / (tot_emp_naics_eligible / tot_emp_eligible)
assert !mi(naics_emp_share)
drop tot_emp_naics_eligible tot_emp_eligible tot_emp_naics tot_emp

* Rescale weights by NAICS share 
replace base = base * naics_emp_share
	
* Check that industry composition is now equal for treated vs. control firms 
preserve 
gegen tot_emp_naics_eligible = total(base), by(naics eligible)
gegen tot_emp_eligible = total(base), by(eligible)
gegen tot_emp_naics = total(base), by(naics)
gegen tot_emp = total(base)
	
gen naics_emp_share_eligible = tot_emp_naics_eligible / tot_emp_eligible
keep naics_emp_share_eligible naics eligible
gduplicates drop
greshape wide naics_emp_share_eligible, i(naics) j(eligible)
assert inrange(naics_emp_share_eligible0, naics_emp_share_eligible1 - 1E-7, naics_emp_share_eligible1 + 1E-7)
restore

* Changes to June 2020
keep if inrange(date, mdy(6, 1, 2020), mdy(6, 30, 2020))
	
* Winsorize 
assert !mi(emp_index_combined)
sum emp_index_combined [w = base], d 
replace emp_index_combined = r(p99) if emp_index_combined > r(p99)
		
gcollapse (mean) emp_index_combined [w = base], by(size)


********************************************************************************
**# 2. Plot
********************************************************************************	

tw scatter emp_index_combined size, ///
	ylab(-10 "-10%" -20 "-20%" -30 "-30%" -40 "-40%", nogrid) ///
	xlab(100 "100" 200 "200" 300 "300" 400 "400" 500 "500" 600 "600" 700 "700" 800 "800") ///
	ytitle("Change in Employment (%)" "from January to June 2020") ///
	xtitle("Firm Size: Number of Employees") ///
	xline(500, lpattern(dash) lcolor(gs8))

oi_graph_export "${root}/results/Employment/Change in Employment by Firm Size", type(${fig_type})
